package cetcbigdata.da.service.xinjiang.spider3;

import cetcbigdata.da.base.Base;
import cn.wanghaomiao.xpath.model.JXDocument;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.jsoup.nodes.Element;

import java.nio.charset.StandardCharsets;
import java.util.List;

public class Spider3 extends Base {
    @Override
    public void getData(RecordSender recordSender, List<Object> fieldsList) {

    }

    @Override
    public JSONObject getDataDemo() {
        try {
            return parseListPage();
        }catch (Exception e){
            e.printStackTrace();
        }
        return null;
    }

    private JSONObject parseListPage() {
        String text = "[{'name':'市场主体登记后置审批事项目录','url':'https://zwfw.xinjiang.gov.cn/xjzwfw2021/scztdjhzspsxml.html'}," +
                       "{'name':'政务服务事项精简申报材料清单（2020年度）','url':'https://zwfw.xinjiang.gov.cn/xjzwfw2021/sxcljj.html'}]";
        JSONArray pageList = JSONArray.parseArray(text);
        JSONObject header = new JSONObject();
        header.put("Referer", "https://zwfw.xinjiang.gov.cn/xjzwfw2021/fwqdnew/inventory.html");
        header.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36");
        for (Object o : pageList){
            JSONObject page = (JSONObject) o;
            String url = page.getString("url");
            String module = page.getString("name");
            String response = getReq(url, header, new JSONObject());
            response = new String(response.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
            JXDocument jxDocument = new JXDocument(response);
            List<Object> list = jxDocument.sel("//tbody[@id='pricesheetlist']//tr");
            JSONArray taskInfolist = new JSONArray();
            JSONArray bylaws = new JSONArray();
            JSONObject taskInfo = new JSONObject();
            int i = 0;
            for (Object o1 : list){
                Element task = (Element) o1;
                if (i==0) {
                    if (taskInfo.size() > 0){
                        taskInfolist.add(taskInfo);
                    }
                    taskInfo = new JSONObject();
                    bylaws = new JSONArray();
                    try {
                        String rowspan = task.child(0).attr("rowspan");
                        i = Integer.parseInt(rowspan);
                    }catch (Exception e){
                        i = 1;
                    }
                    taskInfo.put("序号", task.child(0).text());
                    taskInfo.put("事项名称", task.child(1).text());
                    taskInfo.put("实施机关", task.child(2).text());
                    bylaws.add(task.child(2).text());
                }else {
                    bylaws.add(task.child(0).text());
                }
                taskInfo.put("设定依据", bylaws);
                i--;
            }
            JSONObject result = new JSONObject();
            result.put("名称", module);
            result.put("选项", taskInfolist);
            System.out.println(result);
            return processResult(
                    null,
                    null,
                    url,
                    response,
                    result,
                    "新疆维吾尔自治区",
                    module
            );
        }
        return null;
    }

    public static void main(String[] args) {
        Spider3 spider3 = new Spider3();
        spider3.getDataDemo();
    }
}
